key1 <- read.table(file = './Data/Strain.clone.info.tsv', sep = '\t', header = TRUE) %>% rename(parent_ST=ID, new_ST=Strain , strain=Hartwell)
g1 <- read.table(file = './Data/Group1 gene.presence_absence.tsv', sep = '\t', header = TRUE)
g2 <- g1 %>%
mutate(strain= row.names(g1)) %>%
left_join(key1, by='strain')%>%
select(-strain, -parent_ST) %>%
mutate(ID2 = paste(new_ST,Clone, sep='_'))
test <- unique(g1)
nrow(test)==nrow(g1) #if T, no rows removed
## [1] TRUE
g2.m <- g2 %>%
mutate(ID2 = paste(new_ST,Clone, sep='_')) %>%
reshape2::melt(id.vars=c('new_ST','Clone','ID2')) %>%
arrange(ID2, Clone) %>%
mutate(value=as.factor(value), group=grepl('group',variable)) %>%
filter(group==0)
# Simple heatmap, unstructured except sort by ST
ggplot(g2.m ,aes(variable, ID2, fill= value)) +
geom_tile()
focus on genes that have a clear annotation
g2a <- g2 %>%
mutate(ID2 = paste(new_ST,Clone, sep='_'))
row.names(g2a) <- g2a$ID2
mat1 <- g2a %>%
dplyr::select(-new_ST,-Clone, -ID2) %>%
as.matrix()
groupvar=grep('group',colnames(mat1))
mat1 <- mat1[,-groupvar]
library(pheatmap)
p2 <- pheatmap(mat1)
p2
this takes a really long time to render
p <- heatmaply(mat1,
#dendrogram = "row",
xlab = "", ylab = "",
main = "",
scale = "none",
margins = c(60,100,40,20),
grid_gap=0,
titleX = FALSE,
hide_colorbar = TRUE,
branches_lwd = 0.1,
label_names = c("Serotype/isolate", "Feature:", "Value"),
fontsize_row = 5, fontsize_col = 5,
labCol = colnames(mat1),
labRow = rownames(mat1),
heatmap_layers = theme(axis.line=element_blank())
)
p